Data pre-processing
Import dataset
The phyloseq package is used here to combine the OTU table, taxonomic assignments, and sample metadata into a single R data object (class ‘phyloseq’) to facilitate downstream analyses.
# Import taxonomic assignment data from blast and nw
read.nw <- function(file) {
nw <- read.table(file, stringsAsFactors=FALSE)
nw <- nw[order(nw$otu),]
return(nw)
}
tax97 <- read.nw("data/otus_97/nw_tophits.tsv")
tax100 <- read.nw("data/otus_100/nw_tophits.tsv")
tax97bs <- read.nw("data/otus_97_bysample/nw_tophits.tsv")
tax97bsp <- read.nw("data/otus_97_byspecies/nw_tophits.tsv")
# Deal with identical taxonomic assignments (because some reference sequences are not unique...)
# Create names for groups of identical sequences based on members of group...
ident <- readLines("data/ITS2db_trimmed_notuniques_otus.txt")
ident <- gsub("denovo[0-9]*\t", "", ident)
ident <- strsplit(ident, split="\t")
ident2 <- lapply(ident, str_match_all, pattern="[A-I]{1}[0-9]{1,3}.*[_/]")
ident2 <- lapply(ident2, function(g) gsub(pattern="\\..*_$", "", x=unlist(g)))
ident2 <- lapply(ident2, function(g) gsub(pattern="_$", "", g))
ident2 <- lapply(ident2, function(g) unlist(strsplit(g, split="/")))
subtypes <- lapply(ident2, function(x) levels(factor(unlist(x))))
subtypes <- lapply(subtypes, function(s) paste(paste(s, collapse="/"), "_multiple", sep=""))
names(ident) <- subtypes
ident <- melt(do.call(rbind, ident))
## Warning in (function (..., deparse.level = 1) : number of columns of result
## is not a multiple of vector length (arg 6)
ident <- unique(ident[order(ident[,1]), c(3,1)])
# Replace any sequence name in taxonomy assignment that is a member of a group of identical sequences with the name of the group
collapse.idents <- function(df) {
within(df, {
for (i in 1:nrow(ident)) {
hit <- gsub(ident[i,1], ident[i,2], hit)
}
})
}
tax97 <- collapse.idents(tax97)
tax97bs <- collapse.idents(tax97bs)
tax97bsp <- collapse.idents(tax97bsp)
tax100 <- collapse.idents(tax100)
#tax97 <- get.st(tax97)
tax97 <- with(tax97, tax97[order(otu, -sim), ])
tax97 <- tax97[!duplicated(tax97$otu), ]
rownames(tax97) <- tax97$otu
#tax97bs <- get.st(tax97bs)
tax97bs <- with(tax97bs, tax97bs[order(otu, -sim), ])
tax97bs <- tax97bs[!duplicated(tax97bs$otu), ]
rownames(tax97bs) <- tax97bs$otu
#tax97bsp <- get.st(tax97bsp)
tax97bsp <- with(tax97bsp, tax97bsp[order(otu, -sim), ])
tax97bsp <- tax97bsp[!duplicated(tax97bsp$otu), ]
rownames(tax97bsp) <- tax97bsp$otu
#tax100 <- get.st(tax100)
tax100 <- with(tax100, tax100[order(otu, -sim), ])
tax100 <- tax100[!duplicated(tax100$otu), ]
rownames(tax100) <- tax100$otu
# Import OTU tables
otu97 <- otu_table(read.table("data/otus_97/97_otus.tsv", header=T, check.names=F, row.names=1,
skip=1, sep="\t", comment.char=""), taxa_are_rows=T)
otu97bs <- otu_table(read.table("data/otus_97_bysample/97_otus_bysample.tsv", header=T, check.names=F, row.names=1,
skip=1, sep="\t", comment.char=""), taxa_are_rows=T)
otu97bsp <- otu_table(read.table("data/otus_97_byspecies/97_otus_byspecies.tsv", header=T, check.names=F, row.names=1,
skip=1, sep="\t", comment.char=""), taxa_are_rows=T)
otu100 <- otu_table(read.table("data/otus_100/100_otus.tsv", header=T, check.names=F, row.names=1,
skip=1, sep="\t", comment.char=""), taxa_are_rows=T)
# Import sample data
sam <- sample_data(read.delim("data/mapping_file.txt", sep="\t", header=T, row.names=1))
# Build phyloseq objects
phy97 <- phyloseq(otu97, tax_table(as.matrix(tax97)), sam)
phy97bs <- phyloseq(otu97bs, tax_table(as.matrix(tax97bs)), sam)
phy97bsp <- phyloseq(otu97bsp, tax_table(as.matrix(tax97bsp)), sam)
phy100 <- phyloseq(otu100, tax_table(as.matrix(tax100)), sam)
Filter dataset
# FILTER OUT OTUS THAT ARE NOT SYMBIODINIUM
# If the top hit in NCBI does not contain the string "Symbiodinium", then this sequence is assumed to not be Symbiodinium.
poormatch97 <- readLines("data/otus_97/poormatch_IDs.txt")
poormatch97 <- data.frame(otu=str_extract(poormatch97, "denovo[^ ]*"),
symbio=str_detect(poormatch97, "Symbiodinium"), # TRUE if Symbiodinium
stringsAsFactors = FALSE)
notsymbio97 <- poormatch97[which(poormatch97$symbio==F), 1]
poormatch97bs <- readLines("data/otus_97_bysample/poormatch_IDs.txt")
poormatch97bs <- data.frame(otu=str_extract(poormatch97bs, "denovo[^ ]*"),
symbio=str_detect(poormatch97bs, "Symbiodinium"), # TRUE if Symbiodinium
stringsAsFactors = FALSE)
notsymbio97bs <- poormatch97bs[which(poormatch97bs$symbio==F), 1]
poormatch97bsp <- readLines("data/otus_97_byspecies/poormatch_IDs.txt")
poormatch97bsp <- data.frame(otu=str_extract(poormatch97bsp, "denovo[^ ]*"),
symbio=str_detect(poormatch97bsp, "Symbiodinium"), # TRUE if Symbiodinium
stringsAsFactors = FALSE)
notsymbio97bsp <- poormatch97bsp[which(poormatch97bsp$symbio==F), 1]
poormatch100 <- readLines("data/otus_100/poormatch_IDs.txt")
poormatch100 <- data.frame(otu=str_extract(poormatch100, "denovo[^ ]*"),
symbio=str_detect(poormatch100, "Symbiodinium"), # TRUE if Symbiodinium
stringsAsFactors = FALSE)
notsymbio100 <- poormatch100[which(poormatch100$symbio==F), 1]
# Remove OTUs that are not Symbiodinium
phy97.f <- prune_taxa(!taxa_names(phy97) %in% notsymbio97, phy97)
phy97bs.f <- prune_taxa(!taxa_names(phy97bs) %in% notsymbio97bs, phy97bs)
phy97bsp.f <- prune_taxa(!taxa_names(phy97bsp) %in% notsymbio97bsp, phy97bsp)
phy100.f <- prune_taxa(!taxa_names(phy100) %in% notsymbio100, phy100)
# Filter OTUs by minimum count
# Set threshold count
n <- 10
# Identify OTUs below threshold count
taxa97 <- taxa_sums(phy97.f)[which(taxa_sums(phy97.f) >= n)]
taxa97bs <- taxa_sums(phy97bs.f)[which(taxa_sums(phy97bs.f) >= n)]
taxa97bsp <- taxa_sums(phy97bsp.f)[which(taxa_sums(phy97bsp.f) >= n)]
taxa100 <- taxa_sums(phy100.f)[which(taxa_sums(phy100.f) >= n)]
# Remove taxa below threshold count
phy97.f <- prune_taxa(names(taxa97), phy97.f)
phy97bs.f <- prune_taxa(names(taxa97bs), phy97bs.f)
phy97bsp.f <- prune_taxa(names(taxa97bsp), phy97bsp.f)
phy100.f <- prune_taxa(names(taxa100), phy100.f)
# Filter samples by minimum count
# Set threshold number of reads
sn <- 200
# Remove samples with fewer reads than threshold
phy97.f <- prune_samples(sample_sums(phy97.f)>=sn, phy97.f)
phy97bs.f <- prune_samples(sample_sums(phy97bs.f)>=sn, phy97bs.f)
phy97bsp.f <- prune_samples(sample_sums(phy97bsp.f)>=sn, phy97bsp.f)
phy100.f <- prune_samples(sample_sums(phy100.f)>=sn, phy100.f)
# Filter OTUs by minimum count again in case any dropped below threshold after filtering samples
# Identify OTUs below threshold count
taxa97 <- taxa_sums(phy97.f)[which(taxa_sums(phy97.f) >= n)]
taxa97bs <- taxa_sums(phy97bs.f)[which(taxa_sums(phy97bs.f) >= n)]
taxa97bsp <- taxa_sums(phy97bsp.f)[which(taxa_sums(phy97bsp.f) >= n)]
taxa100 <- taxa_sums(phy100.f)[which(taxa_sums(phy100.f) >= n)]
# Remove taxa below threshold count
phy97.f <- prune_taxa(names(taxa97), phy97.f)
phy97bs.f <- prune_taxa(names(taxa97bs), phy97bs.f)
phy97bsp.f <- prune_taxa(names(taxa97bsp), phy97bsp.f)
phy100.f <- prune_taxa(names(taxa100), phy100.f)
# Label clades and subtypes for filtered phyloseq object tax_tables
get.st <- function(df) {
within(df, {
Clade <- substr(hit, 1, 1)
Subtype <- gsub(hit, pattern="_[A-Z]{2}[0-9]{6}", replacement="")
Subtype <- gsub(Subtype, pattern="_multiple", replacement="")
Subtype2 <- ifelse(as.numeric(sim)==100, paste0("'", Subtype, "'"),
paste0("'[", rep(rle(sort(Subtype))$values, times=rle(sort(Subtype))$lengths), "]'^",
unlist(lapply(rle(sort(Subtype))$lengths, seq_len)))[order(order(Subtype))])
#Subtype <- ifelse(as.numeric(sim)==100, Subtype, paste("*", Subtype, sep=""))
})
}
tax_table(phy97.f) <- as.matrix(get.st(data.frame(tax_table(phy97.f), stringsAsFactors=FALSE)))
tax_table(phy97bs.f) <- as.matrix(get.st(data.frame(tax_table(phy97bs.f), stringsAsFactors=FALSE)))
tax_table(phy97bsp.f) <- as.matrix(get.st(data.frame(tax_table(phy97bsp.f), stringsAsFactors=FALSE)))
tax_table(phy100.f) <- as.matrix(get.st(data.frame(tax_table(phy100.f), stringsAsFactors=FALSE)))
- Minimum count to retain OTU: 10
- Minimum count to retain sample: 200
Descriptive statistics
Filtered dataset
# Compute summary statistics
stats97.f <- data.frame(`97% OTUs`=t(phystats(phy97.f)), check.names=F)
stats97bs.f <- data.frame(`97% within-sample OTUs`=t(phystats(phy97bs.f)), check.names=F)
stats97bsp.f <- data.frame(`97% within-species OTUs`=t(phystats(phy97bsp.f)), check.names=F)
stats100.f <- data.frame(`100% OTUs`=t(phystats(phy100.f)), check.names=F)
# Create and plot histograms
taxhist97 <- hist(log10(taxa_sums(phy97.f)), plot=F)
taxhist97bs <- hist(log10(taxa_sums(phy97bs.f)), plot=F)
taxhist97bsp <- hist(log10(taxa_sums(phy97bsp.f)), plot=F)
taxhist100 <- hist(log10(taxa_sums(phy100.f)), plot=F)
samhist97 <- hist(log10(sample_sums(phy97.f)), plot=F)
samhist97bs <- hist(log10(sample_sums(phy97bs.f)), plot=F)
samhist97bsp <- hist(log10(sample_sums(phy97bsp.f)), plot=F)
samhist100 <- hist(log10(sample_sums(phy100.f)), plot=F)
par(mfrow=c(2, 4), mar=c(3,3,1,1))
plot(taxhist97, col="black", main="97% OTU counts", xlim=c(0, 6), las=1, mgp=c(2,0.5,0),
xlab="No. sequences (log10)", ylab="No. OTUs", cex.lab=0.75, cex.axis=0.75)
plot(taxhist97bs, col="black", main="97% sample OTU counts", xlim=c(0, 6), las=1, mgp=c(2,0.5,0),
xlab="No. sequences (log10)", ylab="No. OTUs", cex.lab=0.75, cex.axis=0.75)
plot(taxhist97bsp, col="black", main="97% species OTU counts", xlim=c(0, 6), las=1, mgp=c(2,0.5,0),
xlab="No. sequences (log10)", ylab="No. OTUs", cex.lab=0.75, cex.axis=0.75)
plot(taxhist100, col="black", main="100% OTU counts", xlim=c(0, 6), las=1, mgp=c(2,0.5,0),
xlab="No. sequences (log10)", ylab="No. OTUs", cex.lab=0.75, cex.axis=0.75)
plot(samhist97, col="black", main="97% OTU reads per sample", xlim=c(0, 6), las=1, mgp=c(2,0.5,0),
xlab="No. sequences (log10)", ylab="No. samples", cex.lab=0.75, cex.axis=0.75)
plot(samhist97bs, col="black", main="97% sample OTU reads per sample", xlim=c(0, 6), las=1, mgp=c(2,0.5,0),
xlab="No. sequences (log10)", ylab="No. samples", cex.lab=0.75, cex.axis=0.75)
plot(samhist97bsp, col="black", main="97% species OTU reads per sample", xlim=c(0, 6), las=1, mgp=c(2,0.5,0),
xlab="No. sequences (log10)", ylab="No. samples", cex.lab=0.75, cex.axis=0.75)
plot(samhist100, col="black", main="100% OTU reads per sample", xlim=c(0, 6), las=1, mgp=c(2,0.5,0),
xlab="No. sequences (log10)", ylab="No. samples", cex.lab=0.75, cex.axis=0.75)

# Create stats table
knitr::kable(cbind(stats97.f, stats97bs.f, stats97bsp.f, stats100.f))
| Total count in OTU table |
1489768 |
1489657 |
1489781 |
943643 |
| Number of OTUs |
94 |
106 |
86 |
4718 |
| Range of OTU counts |
10 - 742671 |
10 - 472752 |
10 - 566295 |
10 - 171212 |
| Number of singleton OTUs |
0 |
0 |
0 |
0 |
| Number of samples |
80 |
80 |
80 |
80 |
| Range of reads per sample |
706 - 169884 |
707 - 169890 |
707 - 169893 |
485 - 97003 |
| Arithmetic mean (±SD) reads per sample |
18622 ± 21102 |
18620 ± 21103 |
18622 ± 21103 |
11795 ± 12744 |
| Geometric mean (±SD) reads per sample |
13141 ± 2 |
13137 ± 2 |
13141 ± 2 |
8141 ± 2 |
Raw dataset
# Compute summary statistics
stats97 <- data.frame(`97% OTUs`=t(phystats(phy97)), check.names=F)
stats97bs <- data.frame(`97% within-sample OTUs`=t(phystats(phy97bs)), check.names=F)
stats97bsp <- data.frame(`97% within-species OTUs`=t(phystats(phy97bsp)), check.names=F)
stats100 <- data.frame(`100% OTUs`=t(phystats(phy100)), check.names=F)
# Create and plot histograms
taxhist97 <- hist(log10(taxa_sums(phy97)), plot=F)
samhist97 <- hist(log10(sample_sums(phy97)), plot=F)
taxhist97bs <- hist(log10(taxa_sums(phy97bs)), plot=F)
samhist97bs <- hist(log10(sample_sums(phy97bs)), plot=F)
taxhist97bsp <- hist(log10(taxa_sums(phy97bsp)), plot=F)
samhist97bsp <- hist(log10(sample_sums(phy97bsp)), plot=F)
taxhist100 <- hist(log10(taxa_sums(phy100)), plot=F)
samhist100 <- hist(log10(sample_sums(phy100)), plot=F)
par(mfrow=c(2, 4), mar=c(3,3,1,1))
plot(taxhist97, col="black", main="97% OTU counts", xlim=c(0, 6), las=1, mgp=c(2,0.5,0),
xlab="No. sequences (log10)", ylab="No. OTUs", cex.lab=0.75, cex.axis=0.75)
plot(taxhist97bs, col="black", main="97% within-sample OTU counts", xlim=c(0, 6), las=1, mgp=c(2,0.5,0),
xlab="No. sequences (log10)", ylab="No. OTUs", cex.lab=0.75, cex.axis=0.75)
plot(taxhist97bsp, col="black", main="97% within-species OTU counts", xlim=c(0, 6), las=1, mgp=c(2,0.5,0),
xlab="No. sequences (log10)", ylab="No. OTUs", cex.lab=0.75, cex.axis=0.75)
plot(taxhist100, col="black", main="100% OTU counts", xlim=c(0, 6), las=1, mgp=c(2,0.5,0),
xlab="No. sequences (log10)", ylab="No. OTUs", cex.lab=0.75, cex.axis=0.75)
plot(samhist97, col="black", main="97% OTU reads per sample", xlim=c(0, 6), las=1, mgp=c(2,0.5,0),
xlab="No. sequences (log10)", ylab="No. samples", cex.lab=0.75, cex.axis=0.75)
plot(samhist97bs, col="black", main="97% within-sample OTU reads per sample", xlim=c(0, 6), las=1, mgp=c(2,0.5,0),
xlab="No. sequences (log10)", ylab="No. samples", cex.lab=0.75, cex.axis=0.75)
plot(samhist97bs, col="black", main="97% within-species OTU reads per sample", xlim=c(0, 6), las=1, mgp=c(2,0.5,0),
xlab="No. sequences (log10)", ylab="No. samples", cex.lab=0.75, cex.axis=0.75)
plot(samhist100, col="black", main="100% OTU reads per sample", xlim=c(0, 6), las=1, mgp=c(2,0.5,0),
xlab="No. sequences (log10)", ylab="No. samples", cex.lab=0.75, cex.axis=0.75)

# Create stats table
knitr::kable(cbind(stats97, stats97bs, stats97bsp, stats100))
| Total count in OTU table |
1490760 |
1490720 |
1490757 |
1055405 |
| Number of OTUs |
130 |
179 |
124 |
41390 |
| Range of OTU counts |
2 - 742681 |
2 - 472753 |
2 - 566413 |
2 - 171257 |
| Number of singleton OTUs |
0 |
0 |
0 |
0 |
| Number of samples |
84 |
84 |
84 |
84 |
| Range of reads per sample |
3 - 169903 |
3 - 169901 |
3 - 169901 |
1 - 112784 |
| Arithmetic mean (±SD) reads per sample |
17747 ± 20969 |
17746 ± 20969 |
17747 ± 20969 |
12564 ± 14471 |
| Geometric mean (±SD) reads per sample |
9420 ± 5 |
9393 ± 5 |
9410 ± 5 |
6449 ± 6 |
Clade overview
Here the composition of each sample is plotted as a horizontal bar, sorted by species and shore. Each segment of the bar represents an individual OTU and is colored by clade. This plot is a nice overview of the whole dataset but only provides coarse information at the clade level.
cladeAbund <- aggregate(data.frame(RelAbund=rowSums(otu_table(phy97bs.f.p))),
by=list(Clade=data.frame(tax_table(phy97bs.f.p))$Clade), FUN=sum)
cladeAbund$Prop <- prop.table(cladeAbund$RelAbund)
bars <- barplot(cladeAbund$Prop*100, col=taxcolors, space=0,
names.arg=cladeAbund$Clade, xlab=expression(paste(italic('Symbiodinium'), " Clade")),
ylab="Relative abundance (%)")
text(bars, cladeAbund$Prop*100+2, labels=round(cladeAbund$Prop*100, 1), xpd=T)

cladeAbund$Notus <- table(data.frame(tax_table(phy97bs.f.p))$Clade)
par(mfrow=c(1,1), mar=c(2, 1.5, 2, 5), lwd=0.1, cex=0.7)
# Plot composition of 97% within-sample OTUs colored by clade
composition(phy97bs.f.p, col=taxcolors[factor(data.frame(tax_table(phy97bs.f.p))[order(data.frame(tax_table(phy97bs.f.p))$Subtype), ]$Clade, levels=c("A","B","C","D","F","G"))], legend=T)

# Plot composition of 97% within-sample OTUs colored by OTU
#composition(phy97bs.f.p, col=rainbow(ntaxa(phy97bs.f.p)), legend=F)
Symbiodinium in each coral
For each coral species, barplots are presented showing the relative abundance of OTUs obtained by 100%, 97%, and 97%-within-sample clustering. OTUs comprising more than 4% of a sample are labeled with the unique OTU number and the Symbiodinium subtype and NCBI GenBank accession number of the closest BLAST hit for that OTU in the reference database. OTU numbers and barplot colors are NOT comparable across the three clustering methods.
Diploria strigosa
Notice how 97% OTUs and 97% within-sample OTUs are NOT THE SAME! Specifically, notice how in the 97% OTU dataset, all samples except two are dominated by the same OTU, denovo235. In the 97% within-sample OTU dataset, those samples are dominated by three different OTUs – denovo73, denovo231, and denovo10. Look at the 100% OTU dataset to see the composition of unique sequence variants in these samples. Indeed, these samples are dominated by different sequence variants. However, in the 97% OTU approach, they have all been collapsed into the same OTU, while in the 97% within-sample clustering approach, samples with different dominant sequence variants are maintained as separate OTUs, even though these different sequence variants are more than 97% similar to each other. I suggest that the within-sample clustering approach is a better way of treating Symbiodinium ITS2 sequence data.
# Create subsetted phyloseq objects for Diploria strigosa
dstr97.f.p <- subset_samples(phy97.f.p, Species=="strigosa")
dstr97bs.f.p <- subset_samples(phy97bs.f.p, Species=="strigosa")
dstr97bsp.f.p <- subset_samples(phy97bsp.f.p, Species=="strigosa")
dstr100.f.p <- subset_samples(phy100.f.p, Species=="strigosa")
# Plot custom barplots for Diploria strigosa
par(mfrow=c(4,1), mar=c(4,4,2,0), mgp=c(0,0.5,0), lwd=0.1)
otubarplot(dstr97.f.p, main="97% OTUs")
otubarplot(dstr97bsp.f.p, main="97% within-species OTUs")
otubarplot(dstr97bs.f.p, main="97% within-sample OTUs")
otubarplot(dstr100.f.p, main="100% OTUs")

dstr.net <- makenet(dstr97bs.f.p, 0)
set.seed(54538)
plotnet(dstr.net)

Network analysis for D. strigosa
Number of OTUs: 38
White circles represent coral samples, colored circles represent Symbiodinium OTUs (97% within-sample). Thickness of lines corresponds to the relative abundance of an OTU within a sample.
Porites furcata
# Create subsetted phyloseq objects for Porites furcata
pfur97.f.p <- subset_samples(phy97.f.p, Species=="furcata")
pfur97bsp.f.p <- subset_samples(phy97bsp.f.p, Species=="furcata")
pfur97bs.f.p <- subset_samples(phy97bs.f.p, Species=="furcata")
pfur100.f.p <- subset_samples(phy100.f.p, Species=="furcata")
# Plot custom barplots for Porites furcata
par(mfrow=c(4,1), mar=c(4,4,2,0), mgp=c(0,0.5,0), lwd=0.1)
otubarplot(pfur97.f.p, main="97% OTUs")
otubarplot(pfur97bsp.f.p, main="97% within-species OTUs")
otubarplot(pfur97bs.f.p, main="97% within-sample OTUs")
otubarplot(pfur100.f.p, main="100% OTUs")

# Network analysis
pfur.net <- makenet(pfur97bs.f.p, 0)
par(mar=c(0,0,0,0))
set.seed(54538)
plotnet(pfur.net)

Network analysis for P. furcata
Number of OTUs: 24
White circles represent coral samples, colored circles represent Symbiodinium OTUs (97% within-sample). Thickness of lines corresponds to the relative abundance of an OTU within a sample.
Orbicella annularis
# Create subsetted phyloseq objects for Orbicella annularis
oann97.f.p <- subset_samples(phy97.f.p, Species=="annularis")
oann97bsp.f.p <- subset_samples(phy97bsp.f.p, Species=="annularis")
oann97bs.f.p <- subset_samples(phy97bs.f.p, Species=="annularis")
oann100.f.p <- subset_samples(phy100.f.p, Species=="annularis")
# Plot custom barplots for Orbicella annularis
par(mfrow=c(4,1), mar=c(4,4,2,0), mgp=c(0,0.5,0), lwd=0.1)
otubarplot(oann97.f.p, main="97% OTUs")
otubarplot(oann97bsp.f.p, main="97% within-species OTUs")
otubarplot(oann97bs.f.p, main="97% within-sample OTUs")
otubarplot(oann100.f.p, main="100% OTUs")

# Network analysis
oann.net <- makenet(oann97bs.f.p, 0)
par(mar=c(0,0,0,0))
set.seed(54538)
plotnet(oann.net)

Network analysis for O. annularis
Number of OTUs: 4
White circles represent coral samples, colored circles represent Symbiodinium OTUs (97% within-sample). Thickness of lines corresponds to the relative abundance of an OTU within a sample.
Millepora alcicornis
# Create subsetted phyloseq objects for Millepora alcicornis
malc97.f.p <- subset_samples(phy97.f.p, Species=="alcicornis")
malc97bsp.f.p <- subset_samples(phy97bsp.f.p, Species=="alcicornis")
malc97bs.f.p <- subset_samples(phy97bs.f.p, Species=="alcicornis")
malc100.f.p <- subset_samples(phy100.f.p, Species=="alcicornis")
# Plot custom barplots for Millepora alcicornis
par(mfrow=c(4,1), mar=c(4,4,2,0), mgp=c(0,0.5,0), lwd=0.1)
otubarplot(malc97.f.p, main="97% OTUs")
otubarplot(malc97bsp.f.p, main="97% within-sample OTUs")
otubarplot(malc97bs.f.p, main="97% within-sample OTUs")
otubarplot(malc100.f.p, main="100% OTUs")

# Network analysis
malc.net <- makenet(malc97bs.f.p, 0)
par(mar=c(0,0,0,0))
set.seed(54538)
plotnet(malc.net)

Network analysis for M. alcicornis
Number of OTUs: 15
White circles represent coral samples, colored circles represent Symbiodinium OTUs (97% within-sample). Thickness of lines corresponds to the relative abundance of an OTU within a sample.
Siderastrea siderea
# Create subsetted phyloseq objects for Siderastrea siderea
ssid97.f.p <- subset_samples(phy97.f.p, Species=="siderea")
ssid97bsp.f.p <- subset_samples(phy97bsp.f.p, Species=="siderea")
ssid97bs.f.p <- subset_samples(phy97bs.f.p, Species=="siderea")
ssid100.f.p <- subset_samples(phy100.f.p, Species=="siderea")
# Plot custom barplots for Siderastrea siderea
par(mfrow=c(4,1), mar=c(4,4,2,0), mgp=c(0,0.5,0), lwd=0.1)
otubarplot(ssid97.f.p, main="97% OTUs")
otubarplot(ssid97bsp.f.p, main="97% within-species OTUs")
otubarplot(ssid97bs.f.p, main="97% within-sample OTUs")
otubarplot(ssid100.f.p, main="100% OTUs")

# Network analysis
ssid.net <- makenet(ssid97bs.f.p, 0)
par(mar=c(0,0,0,0))
set.seed(834)
plotnet(ssid.net)

Network analysis for S. siderea
Number of OTUs: 16
White circles represent coral samples, colored circles represent Symbiodinium OTUs (97% within-sample). Thickness of lines corresponds to the relative abundance of an OTU within a sample.
Favia fragum
# Create subsetted phyloseq objects for Favia fragum
ffra97.f.p <- subset_samples(phy97.f.p, Species=="fragum")
ffra97bsp.f.p <- subset_samples(phy97bsp.f.p, Species=="fragum")
ffra97bs.f.p <- subset_samples(phy97bs.f.p, Species=="fragum")
ffra100.f.p <- subset_samples(phy100.f.p, Species=="fragum")
# Plot custom barplots for Favia fragum
par(mfrow=c(4,1), mar=c(4,4,2,0), mgp=c(0,0.5,0), lwd=0.1)
otubarplot(ffra97.f.p, main="97% OTUs")
otubarplot(ffra97bsp.f.p, main="97% within-species OTUs")
otubarplot(ffra97bs.f.p, main="97% within-sample OTUs")
otubarplot(ffra100.f.p, main="100% OTUs")

# Network analysis
ffra.net <- makenet(ffra97bs.f.p, 0)
par(mar=c(0,0,0,0))
set.seed(54538)
plotnet(ffra.net)

Network analysis for F. fragum
Number of OTUs: 13
White circles represent coral samples, colored circles represent Symbiodinium OTUs (97% within-sample). Thickness of lines corresponds to the relative abundance of an OTU within a sample.
Siderastrea radians
# Create subsetted phyloseq objects for Siderastrea radians
srad97.f.p <- subset_samples(phy97.f.p, Species=="radians")
srad97bsp.f.p <- subset_samples(phy97bsp.f.p, Species=="radians")
srad97bs.f.p <- subset_samples(phy97bs.f.p, Species=="radians")
srad100.f.p <- subset_samples(phy100.f.p, Species=="radians")
# Plot custom barplots for Siderastrea radians
par(mfrow=c(4,1), mar=c(4,4,2,0), mgp=c(0,0.5,0), lwd=0.1)
otubarplot(srad97.f.p, main="97% OTUs")
otubarplot(srad97bsp.f.p, main="97% within-species OTUs")
otubarplot(srad97bs.f.p, main="97% within-sample OTUs")
otubarplot(srad100.f.p, main="100% OTUs")

# Network analysis
srad.net <- makenet(srad97bs.f.p, 0)
par(mar=c(0,0,0,0))
set.seed(54538)
plotnet(srad.net)

Network analysis for S. radians
Number of OTUs: 14
White circles represent coral samples, colored circles represent Symbiodinium OTUs (97% within-sample). Thickness of lines corresponds to the relative abundance of an OTU within a sample.
Porites astreoides
# Create subsetted phyloseq objects for Porites astreoides
past97.f.p <- subset_samples(phy97.f.p, Species=="astreoides")
past97bsp.f.p <- subset_samples(phy97bsp.f.p, Species=="astreoides")
past97bs.f.p <- subset_samples(phy97bs.f.p, Species=="astreoides")
past100.f.p <- subset_samples(phy100.f.p, Species=="astreoides")
# Plot custom barplots for Porites astreoides
par(mfrow=c(4,1), mar=c(4,4,2,0), mgp=c(0,0.5,0), lwd=0.1)
otubarplot(past97.f.p, main="97% OTUs")
otubarplot(past97bsp.f.p, main="97% within-species OTUs")
otubarplot(past97bs.f.p, main="97% within-sample OTUs")
otubarplot(past100.f.p, main="100% OTUs")

# Network analysis
past.net <- makenet(past97bs.f.p, 0)
par(mar=c(0,0,0,0))
set.seed(54538)
plotnet(past.net)

Network analysis for P. astreoides
Number of OTUs: 9
White circles represent coral samples, colored circles represent Symbiodinium OTUs (97% within-sample). Thickness of lines corresponds to the relative abundance of an OTU within a sample.
Dendrogyra cylindrus
# Create subsetted phyloseq objects for Dendrogyra cylindrus
dcyl97.f.p <- subset_samples(phy97.f.p, Species=="cylindrus")
dcyl97bsp.f.p <- subset_samples(phy97bsp.f.p, Species=="cylindrus")
dcyl97bs.f.p <- subset_samples(phy97bs.f.p, Species=="cylindrus")
dcyl100.f.p <- subset_samples(phy100.f.p, Species=="cylindrus")
# Plot custom barplots for Dendrogyra cylindrus
par(mfrow=c(4,1), mar=c(4,4,2,0), mgp=c(0,0.5,0), lwd=0.1)
otubarplot(dcyl97.f.p, main="97% OTUs")
otubarplot(dcyl97bsp.f.p, main="97% within-species OTUs")
otubarplot(dcyl97bs.f.p, main="97% within-sample OTUs")
otubarplot(dcyl100.f.p, main="100% OTUs")

# Network analysis
dcyl.net <- makenet(dcyl97bs.f.p, 0)
par(mar=c(0,0,0,0))
set.seed(54538)
plotnet(dcyl.net)

Network analysis for D. cylindrus
Number of OTUs: 14
White circles represent coral samples, colored circles represent Symbiodinium OTUs (97% within-sample). Thickness of lines corresponds to the relative abundance of an OTU within a sample.
Montastraea cavernosa
# Create subsetted phyloseq objects for Montastraea cavernosa
mcav97.f.p <- subset_samples(phy97.f.p, Species=="cavernosa")
mcav97bsp.f.p <- subset_samples(phy97bsp.f.p, Species=="cavernosa")
mcav97bs.f.p <- subset_samples(phy97bs.f.p, Species=="cavernosa")
mcav100.f.p <- subset_samples(phy100.f.p, Species=="cavernosa")
# Plot custom barplots for Montastraea cavernosa
par(mfrow=c(4,1), mar=c(4,4,2,0), mgp=c(0,0.5,0), lwd=0.1)
otubarplot(mcav97.f.p, main="97% OTUs")
otubarplot(mcav97bsp.f.p, main="97% within-species OTUs")
otubarplot(mcav97bs.f.p, main="97% within-sample OTUs")
otubarplot(mcav100.f.p, main="100% OTUs")

# Network analysis
mcav.net <- makenet(mcav97bs.f.p, 0)
par(mar=c(0,0,0,0))
set.seed(54538)
plotnet(mcav.net)

Network analysis for M. cavernosa
Number of OTUs: 5
White circles represent coral samples, colored circles represent Symbiodinium OTUs (97% within-sample). Thickness of lines corresponds to the relative abundance of an OTU within a sample.
Beta diversity
- Beta diversity is evaluated here as the multivariate dispersion of samples within a coral species. Principal coordinate analysis on Bray-Curtis dissimilarities is used to calculate average distance-to-centroid values for each species group, which are then compared statistically by a permutation test. This analysis is implemented using betadisper() in the vegan package, based on Anderson (2006).
- In this context, beta diveristy is analogous to ‘flexibility’ in symbiosis – how different can corals of the same species be in terms of their symbiont communities?
- The species with highest beta diversity are D. strigosa, P. furcata, and M. alcicornis, followed by O. annularis and S. siderea. Species with the lowest beta diversity are F. fragum, D. cylindrus, S. radians, P. astreoides, and M. cavernosa.
97% within-sample OTUs
betad97bs <- betad(phy97bs.f.t, group="Species")
with(betad97bs$sambdsumm.ord, {
plot(mean, type="n", main="97% within-sample OTUs",
ylim=c(0, 0.75), ylab="Distance to centroid", xaxt="n", xlab="")
arrows(1:10, mean - se, 1:10, mean + se, length=0.05, angle=90, code=3)
points(1:10, mean, cex=1, pch=21, bg="white")
text(1:10, par("usr")[3]-0.025, srt=45, adj=1, xpd=T, cex=0.75,
labels=levels(sam$Species)[order(betad97bs$sambdsumm$mean, decreasing=T)])
text(1:10, mean + se + 0.03, labels=betad97bs$saml$Letters[as.character(group)], cex=0.5)
})

100% OTUs
betad100 <- betad(phy100.f.t, group="Species")
with(betad100$sambdsumm.ord, {
plot(mean, type="n", main="100% OTUs",
ylim=c(0, 0.75), ylab="Distance to centroid", xaxt="n", xlab="")
arrows(1:10, mean - se, 1:10, mean + se, length=0.05, angle=90, code=3)
points(1:10, mean, cex=1, pch=21, bg="white")
text(1:10, par("usr")[3]-0.025, srt=45, adj=1, xpd=T, cex=0.75,
labels=levels(sam$Species)[order(betad100$sambdsumm$mean, decreasing=T)])
text(1:10, mean + se + 0.03, labels=betad100$saml$Letters[as.character(group)], cex=0.5)
})

97% OTUs
betad97 <- betad(phy97.f.t, group="Species")
# Figure: Distance to centroids
with(betad97$sambdsumm.ord, {
plot(mean, type="n", main="97% OTUs",
ylim=c(0, 0.75), ylab="Distance to centroid", xaxt="n", xlab="")
arrows(1:10, mean - se, 1:10, mean + se, length=0.05, angle=90, code=3)
points(1:10, mean, cex=1, pch=21, bg="white")
text(1:10, par("usr")[3]-0.025, srt=45, adj=1, xpd=T, cex=0.75,
labels=levels(sam$Species)[order(betad97$sambdsumm$mean, decreasing=T)])
text(1:10, mean + se + 0.03, labels=betad97$saml$Letters[as.character(group)], cex=0.5)
})

Differences between species
Whether Symbiodinium communities differ among species is evaluated using permutational analysis of variance (PERMANOVA).
# PERMANOVA for difference among species
sppdiffs <- function(phy) {
permanova.spp.t <- adonis(phyloseq::distance(phy, "bray") ~ Species,
data=as(sample_data(phy), "data.frame"), permutations=9999)
permanova.spp.t
# Pairwise PERMANOVA tests for differences between species
dmat <- as(phyloseq::distance(phy, "bray"), "matrix")
df <- as(sample_data(phy), "data.frame")
pairs <- data.frame(t(combn(levels(df$Species), 2)), stringsAsFactors=F)
p.pairs <- setNames(rep(NA, nrow(pairs)), with(pairs, paste(X1, X2, sep='-')))
for (i in 1:nrow(pairs)) {
dfs <- subset(df, Species %in% c(pairs[i,1], pairs[i,2]))
dmats <- dmat[rownames(dfs), rownames(dfs)]
set.seed(152470)
permanova <- adonis(dmats ~ Species, data=dfs, permutations=999)
p.pairs[i] <- permanova$aov.tab$`Pr(>F)`[1]
}
# Return letters signifying differences
return(multcompLetters(p.pairs))
}
sppdiffs97 <- sppdiffs(phy97.f.t)
sppdiffs97bs <- sppdiffs(phy97bs.f.t)
sppdiffs100 <- sppdiffs(phy100.f.t)
sppdiffs <- data.frame("97% within-sample OTUs"=sppdiffs97bs$Letters,
"100% OTUs"=sppdiffs100$Letters,
"97% OTUs"=sppdiffs97$Letters, check.names=F)
knitr::kable(sppdiffs, caption="Species not sharing a letter are significantly different (p < 0.05)")
Species not sharing a letter are significantly different (p < 0.05)
| alcicornis |
ab |
a |
a |
| annularis |
ab |
b |
bc |
| astreoides |
c |
c |
d |
| cavernosa |
d |
d |
e |
| cylindrus |
a |
e |
f |
| fragum |
ab |
f |
a |
| furcata |
e |
g |
b |
| radians |
f |
h |
g |
| siderea |
d |
d |
g |
| strigosa |
b |
b |
ac |
Results:
- At the 97% within-sample OTU scale, Symbiodinium communities are not significantly different in the corals M. alcicornis, O. annularis, and F. fragum, as these corals are all dominantly associated with the same Symbiodinium OTU (BLAST identity=B1). These corals are also not different from D. cylindrus or D. strigosa, which also dominantly associated with the same B1 OTU, although the latter two species are different from each other. Meanwhile, M. cavernosa and S. siderea also share similar symbiont communities dominated by Symbiodinium C3. Finally, S. radians, P. furcata, and P. astreoides each have distinct, significantly different Symbiodinium communities.
- At the 100% OTU scale, Symbiodinium communities were significantly differentiated in all species except for O. annularis and D. strigosa (sharing B1-dominated communities), and S. siderea and M. cavernosa (sharing C3-dominated communities).